In [48]:
import pandas as pd
import rpy2

In [8]:
auto = pd.read_csv('Auto.csv')

In [11]:
auto.head()


Out[11]:
mpg cylinders displacement horsepower weight acceleration year origin name
0 18 8 307 130 3504 12.0 70 1 chevrolet chevelle malibu
1 15 8 350 165 3693 11.5 70 1 buick skylark 320
2 18 8 318 150 3436 11.0 70 1 plymouth satellite
3 16 8 304 150 3433 12.0 70 1 amc rebel sst
4 17 8 302 140 3449 10.5 70 1 ford torino

5 rows × 9 columns


In [13]:
auto.describe()


Out[13]:
mpg cylinders displacement weight acceleration year origin
count 397.000000 397.000000 397.000000 397.000000 397.000000 397.000000 397.000000
mean 23.515869 5.458438 193.532746 2970.261965 15.555668 75.994962 1.574307
std 7.825804 1.701577 104.379583 847.904119 2.749995 3.690005 0.802549
min 9.000000 3.000000 68.000000 1613.000000 8.000000 70.000000 1.000000
25% 17.500000 4.000000 104.000000 2223.000000 13.800000 73.000000 1.000000
50% 23.000000 4.000000 146.000000 2800.000000 15.500000 76.000000 1.000000
75% 29.000000 8.000000 262.000000 3609.000000 17.100000 79.000000 2.000000
max 46.600000 8.000000 455.000000 5140.000000 24.800000 82.000000 3.000000

8 rows × 7 columns

Take a quick look the factors in Autos.csv


In [26]:
axes = pd.tools.plotting.scatter_matrix(auto, alpha=0.2, figsize=(14,10), diagonal='kde')


Add a mpg/weight column


In [33]:
auto['mpg_per_weight'] = auto.mpg / auto.weight
print(auto[['mpg', 'weight', 'mpg_per_weight']].head())
auto.mpg_per_weight.plot()


   mpg  weight  mpg_per_weight
0   18    3504        0.005137
1   15    3693        0.004062
2   18    3436        0.005239
3   16    3433        0.004661
4   17    3449        0.004929

[5 rows x 3 columns]
Out[33]:
<matplotlib.axes.AxesSubplot at 0x13d3e5f8>

In [34]:
auto.head(2)


Out[34]:
mpg cylinders displacement horsepower weight acceleration year origin name mpg_per_weight
0 18 8 307 130 3504 12.0 70 1 chevrolet chevelle malibu 0.005137
1 15 8 350 165 3693 11.5 70 1 buick skylark 320 0.004062

2 rows × 10 columns


In [38]:
%load_ext rmagic


The rmagic extension is already loaded. To reload it, use:
  %reload_ext rmagic

In [44]:
# this command pushes the pandas.DataFrame auto to R-land
%Rpush auto

In [61]:
%R auto2 = data.frame(auto);



In [60]:
%%R 
pint(head(auto2, 2))



In [50]:
%R pairs(auto)



In [69]:
data = np.random.randn(15000)
plt.hist(data, bins=30)
plt.ylabel('Counts')
plt.title('The Gaussian Distribution')


Out[69]:
<matplotlib.text.Text at 0x12fd6b38>

In [80]:
x = np.random.randn(5000)
y = np.random.randn(5000)

plt.plot(x, y, 'ro')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Scatter plot: Normal vs. Normal')


Out[80]:
<matplotlib.text.Text at 0x121e66d8>

In [77]:
plt.scatter(x, y)


Out[77]:
<matplotlib.collections.PathCollection at 0x12dc2048>

In [83]:
plt.boxplot([x, y])
plt.title('Two box plots, side by side')


Out[83]:
<matplotlib.text.Text at 0x26b00ba8>

In [76]:
s = np.linspace(0, 10)
plt.plot(s, s ** 2, 'r--')


Out[76]:
[<matplotlib.lines.Line2D at 0x12dc6f28>]

In [88]:
auto.plot(x='weight', y='mpg', style='bo')
plt.title('Scatterplot: Mpg vs. Weight')

plt.figure()
auto.hist('mpg')
plt.title('Histogram of mpg')


Out[88]:
<matplotlib.text.Text at 0x280c8d30>
<matplotlib.figure.Figure at 0x130fae10>

In [89]:
from pandas.tools.plotting import scatter_matrix
_ = scatter_matrix(auto[['mpg', 'cylinders', 'displacement']], figsize=(14, 10))